{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "V100",
      "authorship_tag": "ABX9TyPBOWBw9vF2Wuw2s1/2u4qn",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/KevinWang676/Bark-Voice-Cloning/blob/main/Sambert_Voice_Cloning_in_One_Click.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# 全新中文声音克隆 Voice Cloning for Chinese Speech"
      ],
      "metadata": {
        "id": "Uhhc4_stcdSf"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 配置环境 Set up"
      ],
      "metadata": {
        "id": "qIFF53SWVDe-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "! nvidia-smi # 需要使用GPU运行"
      ],
      "metadata": {
        "id": "4RZJ1P69VKLU",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4bf174fb-3131-47ce-a5d5-def6caba4ad4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Sat Sep  9 07:28:21 2023       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   37C    P0    25W / 300W |      0MiB / 16384MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://huggingface.co/spaces/kevinwang676/Personal-TTS-v2"
      ],
      "metadata": {
        "id": "e_OKqHl1gwlJ",
        "outputId": "3dea2e2b-1e1c-4530-e6ee-432cf1db1b12",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "fatal: destination path 'Personal-TTS-v2' already exists and is not an empty directory.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd Personal-TTS-v2"
      ],
      "metadata": {
        "id": "6vcoOVAag0eP",
        "outputId": "0eff3546-030f-4755-eb47-921c01bf20bb",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/Personal-TTS-v2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install openai-whisper\n",
        "! pip install modelscope\n",
        "! pip install tts-autolabel -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
        "! pip install typeguard==2.3.1\n",
        "! pip install sox\n",
        "! pip install bitstring\n",
        "! pip install pysptk --no-build-isolation\n",
        "! pip install kantts -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
        "! pip install pytorch_wavelets\n",
        "! pip install tensorboardX\n",
        "! git clone https://github.com/fbcotter/pytorch_wavelets\n",
        "! pip install matplotlib\n",
        "! pip install numpy==1.22.0\n",
        "! apt-get install sox"
      ],
      "metadata": {
        "id": "s2aAbOEPaVh6",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "7b7c59de-cc9b-4529-e718-97a89c902713"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: openai-whisper in /usr/local/lib/python3.10/dist-packages (20230314)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.0)\n",
            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.56.4)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (1.22.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (2.0.1+cu118)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (4.66.1)\n",
            "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (10.1.0)\n",
            "Requirement already satisfied: tiktoken==0.3.1 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.3.1)\n",
            "Requirement already satisfied: ffmpeg-python==0.2.0 in /usr/local/lib/python3.10/dist-packages (from openai-whisper) (0.2.0)\n",
            "Requirement already satisfied: future in /usr/local/lib/python3.10/dist-packages (from ffmpeg-python==0.2.0->openai-whisper) (0.18.3)\n",
            "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2023.6.3)\n",
            "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken==0.3.1->openai-whisper) (2.31.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.27.4.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (3.12.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->openai-whisper) (16.0.6)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper) (67.7.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper) (3.1.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken==0.3.1->openai-whisper) (2023.7.22)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->openai-whisper) (1.3.0)\n",
            "Requirement already satisfied: modelscope in /usr/local/lib/python3.10/dist-packages (1.9.0)\n",
            "Requirement already satisfied: addict in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.4.0)\n",
            "Requirement already satisfied: attrs in /usr/local/lib/python3.10/dist-packages (from modelscope) (23.1.0)\n",
            "Requirement already satisfied: datasets<=2.13.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.13.0)\n",
            "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.6.1)\n",
            "Requirement already satisfied: filelock>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (3.12.2)\n",
            "Requirement already satisfied: gast>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.4.0)\n",
            "Requirement already satisfied: ms-swift in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.0.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.22.0)\n",
            "Requirement already satisfied: oss2 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.18.1)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.5.3)\n",
            "Requirement already satisfied: Pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (9.4.0)\n",
            "Requirement already satisfied: pyarrow!=9.0.0,>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (13.0.0)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.8.2)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from modelscope) (6.0.1)\n",
            "Requirement already satisfied: requests>=2.25 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.31.0)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from modelscope) (1.10.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from modelscope) (67.7.2)\n",
            "Requirement already satisfied: simplejson>=3.3.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (3.19.1)\n",
            "Requirement already satisfied: sortedcontainers>=1.5.9 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.4.0)\n",
            "Requirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from modelscope) (4.66.1)\n",
            "Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.10/dist-packages (from modelscope) (2.0.4)\n",
            "Requirement already satisfied: yapf in /usr/local/lib/python3.10/dist-packages (from modelscope) (0.40.1)\n",
            "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.3.6)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (3.3.0)\n",
            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.70.14)\n",
            "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (2023.6.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (3.8.5)\n",
            "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (0.16.4)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets<=2.13.0,>=2.8.0->modelscope) (23.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.1->modelscope) (1.16.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (3.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.25->modelscope) (2023.7.22)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.22.0)\n",
            "Requirement already satisfied: diffusers>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.20.2)\n",
            "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.5.0)\n",
            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (0.3.3)\n",
            "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (2.13.0)\n",
            "Requirement already satisfied: transformers>=4.12.0 in /usr/local/lib/python3.10/dist-packages (from ms-swift->modelscope) (4.33.1)\n",
            "Requirement already satisfied: crcmod>=1.7 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (1.7)\n",
            "Requirement already satisfied: pycryptodome>=3.4.7 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (3.18.0)\n",
            "Requirement already satisfied: aliyun-python-sdk-kms>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (2.16.1)\n",
            "Requirement already satisfied: aliyun-python-sdk-core>=2.13.12 in /usr/local/lib/python3.10/dist-packages (from oss2->modelscope) (2.13.36)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->modelscope) (2023.3.post1)\n",
            "Requirement already satisfied: importlib-metadata>=6.6.0 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (6.8.0)\n",
            "Requirement already satisfied: platformdirs>=3.5.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (3.10.0)\n",
            "Requirement already satisfied: tomli>=2.0.1 in /usr/local/lib/python3.10/dist-packages (from yapf->modelscope) (2.0.1)\n",
            "Requirement already satisfied: jmespath<1.0.0,>=0.9.3 in /usr/local/lib/python3.10/dist-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (0.10.0)\n",
            "Requirement already satisfied: cryptography>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (41.0.3)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from diffusers>=0.18.0->ms-swift->modelscope) (2023.6.3)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (6.0.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (4.0.3)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.9.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.4.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets<=2.13.0,>=2.8.0->modelscope) (1.3.1)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets<=2.13.0,>=2.8.0->modelscope) (4.5.0)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=6.6.0->yapf->modelscope) (3.16.2)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.12.0->ms-swift->modelscope) (0.13.3)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->ms-swift->modelscope) (5.9.5)\n",
            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate->ms-swift->modelscope) (2.0.1+cu118)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.4.0)\n",
            "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.57.0)\n",
            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (2.17.3)\n",
            "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (1.0.0)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (3.4.4)\n",
            "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (3.20.3)\n",
            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (0.7.1)\n",
            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (2.3.7)\n",
            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard->ms-swift->modelscope) (0.41.2)\n",
            "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (1.15.1)\n",
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (5.3.1)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (0.3.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (4.9)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->ms-swift->modelscope) (1.3.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate->ms-swift->modelscope) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate->ms-swift->modelscope) (3.27.4.1)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate->ms-swift->modelscope) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard->ms-swift->modelscope) (2.1.3)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=2.6.0->aliyun-python-sdk-core>=2.13.12->oss2->modelscope) (2.21)\n",
            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->ms-swift->modelscope) (0.5.0)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->ms-swift->modelscope) (3.2.2)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate->ms-swift->modelscope) (1.3.0)\n",
            "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
            "Requirement already satisfied: tts-autolabel in /usr/local/lib/python3.10/dist-packages (1.1.8)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.1+cu118)\n",
            "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.0.2+cu118)\n",
            "Requirement already satisfied: onnxruntime in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.15.1)\n",
            "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.10.1)\n",
            "Requirement already satisfied: numpy<=1.23.1 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.22.0)\n",
            "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.4.1)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (6.0.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (4.66.1)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.10.1)\n",
            "Requirement already satisfied: nls-fa in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.1)\n",
            "Requirement already satisfied: kaldi-native-fbank in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (1.18.4)\n",
            "Requirement already satisfied: typeguard<=2.13.3 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (2.3.1)\n",
            "Requirement already satisfied: ttsfrd>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tts-autolabel) (0.2.1)\n",
            "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (3.0.0)\n",
            "Collecting numpy<=1.23.1 (from tts-autolabel)\n",
            "  Using cached numpy-1.23.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.0 MB)\n",
            "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.2.2)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.3.2)\n",
            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.4.2)\n",
            "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.56.4)\n",
            "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.12.1)\n",
            "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.7.0)\n",
            "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3.6)\n",
            "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (4.5.0)\n",
            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (0.3)\n",
            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->tts-autolabel) (1.0.5)\n",
            "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (15.0.1)\n",
            "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.5.26)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (23.1)\n",
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (3.20.3)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime->tts-autolabel) (1.12)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.12.2)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->tts-autolabel) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (3.27.4.1)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->tts-autolabel) (16.0.6)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa->tts-autolabel) (67.7.2)\n",
            "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (3.10.0)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa->tts-autolabel) (2.31.0)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->tts-autolabel) (3.2.0)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa->tts-autolabel) (1.15.1)\n",
            "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime->tts-autolabel) (10.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->tts-autolabel) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime->tts-autolabel) (1.3.0)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa->tts-autolabel) (2.21)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->tts-autolabel) (2023.7.22)\n",
            "Installing collected packages: numpy\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.22.0\n",
            "    Uninstalling numpy-1.22.0:\n",
            "      Successfully uninstalled numpy-1.22.0\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed numpy-1.23.1\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "numpy"
                ]
              }
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: typeguard==2.3.1 in /usr/local/lib/python3.10/dist-packages (2.3.1)\n",
            "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (1.4.1)\n",
            "Requirement already satisfied: numpy>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from sox) (1.23.1)\n",
            "Requirement already satisfied: bitstring in /usr/local/lib/python3.10/dist-packages (4.1.2)\n",
            "Requirement already satisfied: bitarray<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from bitstring) (2.8.1)\n",
            "Requirement already satisfied: pysptk in /usr/local/lib/python3.10/dist-packages (0.2.1)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from pysptk) (1.10.1)\n",
            "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from pysptk) (4.4.2)\n",
            "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk) (0.29.36)\n",
            "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy->pysptk) (1.23.1)\n",
            "Looking in links: https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html\n",
            "Requirement already satisfied: kantts in /usr/local/lib/python3.10/dist-packages (1.0.1)\n",
            "Requirement already satisfied: librosa>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (0.10.1)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from kantts) (3.7.1)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from kantts) (1.23.1)\n",
            "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from kantts) (0.56.4)\n",
            "Requirement already satisfied: unidecode in /usr/local/lib/python3.10/dist-packages (from kantts) (1.3.6)\n",
            "Requirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from kantts) (7.0.0)\n",
            "Requirement already satisfied: pywavelets>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
            "Requirement already satisfied: scikit-learn>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.2.2)\n",
            "Requirement already satisfied: scipy>=1.7.3 in /usr/local/lib/python3.10/dist-packages (from kantts) (1.10.1)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kantts) (4.66.1)\n",
            "Requirement already satisfied: pysptk in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
            "Requirement already satisfied: sox in /usr/local/lib/python3.10/dist-packages (from kantts) (1.4.1)\n",
            "Requirement already satisfied: ttsfrd in /usr/local/lib/python3.10/dist-packages (from kantts) (0.2.1)\n",
            "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (3.0.0)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.3.2)\n",
            "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.4.2)\n",
            "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.12.1)\n",
            "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.7.0)\n",
            "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3.6)\n",
            "Requirement already satisfied: typing-extensions>=4.1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (4.5.0)\n",
            "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (0.3)\n",
            "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.9.2->kantts) (1.0.5)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba->kantts) (67.7.2)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0.2->kantts) (3.2.0)\n",
            "Requirement already satisfied: pydantic>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from inflect->kantts) (1.10.12)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (1.4.5)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (23.1)\n",
            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (9.4.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->kantts) (2.8.2)\n",
            "Requirement already satisfied: cython>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from pysptk->kantts) (0.29.36)\n",
            "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (3.10.0)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa>=0.9.2->kantts) (2.31.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->kantts) (1.16.0)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa>=0.9.2->kantts) (1.15.1)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa>=0.9.2->kantts) (2.21)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa>=0.9.2->kantts) (2023.7.22)\n",
            "Requirement already satisfied: pytorch_wavelets in /usr/local/lib/python3.10/dist-packages (1.3.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.23.1)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (1.16.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch_wavelets) (2.0.1+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch_wavelets) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (3.27.4.1)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch_wavelets) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch_wavelets) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch_wavelets) (1.3.0)\n",
            "Requirement already satisfied: tensorboardX in /usr/local/lib/python3.10/dist-packages (2.6.2.2)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (1.23.1)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (23.1)\n",
            "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.10/dist-packages (from tensorboardX) (3.20.3)\n",
            "fatal: destination path 'pytorch_wavelets' already exists and is not an empty directory.\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.5)\n",
            "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.23.1)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (23.1)\n",
            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (9.4.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (2.8.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
            "Collecting numpy==1.22.0\n",
            "  Using cached numpy-1.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)\n",
            "Installing collected packages: numpy\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.23.1\n",
            "    Uninstalling numpy-1.23.1:\n",
            "      Successfully uninstalled numpy-1.23.1\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "librosa 0.10.1 requires numpy!=1.22.0,!=1.22.1,!=1.22.2,>=1.20.3, but you have numpy 1.22.0 which is incompatible.\n",
            "pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 13.0.0 which is incompatible.\n",
            "plotnine 0.12.3 requires numpy>=1.23.0, but you have numpy 1.22.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed numpy-1.22.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "numpy"
                ]
              }
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Reading package lists... Done\n",
            "Building dependency tree... Done\n",
            "Reading state information... Done\n",
            "sox is already the newest version (14.4.2+git20190427-2+deb11u2ubuntu0.22.04.1).\n",
            "0 upgraded, 0 newly installed, 0 to remove and 16 not upgraded.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd pytorch_wavelets"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "A5mtXiAEUeOi",
        "outputId": "a2fa5038-a929-4b78-f9d0-0f0b8ed7f02b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/Personal-TTS-v2/pytorch_wavelets\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "pip install ."
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0279o9cEUgYo",
        "outputId": "4da47234-d8b2-448f-b494-bb7acc477994"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Processing /content/Personal-TTS-v2/pytorch_wavelets\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.22.0)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (1.16.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from pytorch-wavelets==1.3.0) (2.0.1+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.12.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->pytorch-wavelets==1.3.0) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (3.27.4.1)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->pytorch-wavelets==1.3.0) (16.0.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->pytorch-wavelets==1.3.0) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->pytorch-wavelets==1.3.0) (1.3.0)\n",
            "Building wheels for collected packages: pytorch-wavelets\n",
            "  Building wheel for pytorch-wavelets (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pytorch-wavelets: filename=pytorch_wavelets-1.3.0-py3-none-any.whl size=54852 sha256=cd4232f63138e8a8d6e09752e8296074b3653966a7e03a90ef6c44dd7a69f88a\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-hlkhgtfy/wheels/9a/f5/a7/b8c901126021c077bfd6fbe9f8100e26345c85e77e49f5d444\n",
            "Successfully built pytorch-wavelets\n",
            "Installing collected packages: pytorch-wavelets\n",
            "  Attempting uninstall: pytorch-wavelets\n",
            "    Found existing installation: pytorch-wavelets 1.3.0\n",
            "    Uninstalling pytorch-wavelets-1.3.0:\n",
            "      Successfully uninstalled pytorch-wavelets-1.3.0\n",
            "Successfully installed pytorch-wavelets-1.3.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cd .."
      ],
      "metadata": {
        "id": "GpzRO2X6irTm",
        "outputId": "b2b3f8de-763f-4987-e326-cf20b1683859",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/Personal-TTS-v2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import sox\n",
        "! pip install gradio\n",
        "import gradio as gr"
      ],
      "metadata": {
        "id": "TOvKb2-phId5",
        "outputId": "65526142-deb1-4600-98ae-00281ebaa66b",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: gradio in /usr/local/lib/python3.10/dist-packages (3.43.2)\n",
            "Requirement already satisfied: aiofiles<24.0,>=22.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (23.2.1)\n",
            "Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.2.2)\n",
            "Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (from gradio) (0.103.1)\n",
            "Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/dist-packages (from gradio) (0.3.1)\n",
            "Requirement already satisfied: gradio-client==0.5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.5.0)\n",
            "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from gradio) (0.24.1)\n",
            "Requirement already satisfied: huggingface-hub>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.16.4)\n",
            "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
            "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.2)\n",
            "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.3)\n",
            "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n",
            "Requirement already satisfied: numpy~=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.22.0)\n",
            "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.9.7)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (23.1)\n",
            "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.5.3)\n",
            "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (9.4.0)\n",
            "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.10.12)\n",
            "Requirement already satisfied: pydub in /usr/local/lib/python3.10/dist-packages (from gradio) (0.25.1)\n",
            "Requirement already satisfied: python-multipart in /usr/local/lib/python3.10/dist-packages (from gradio) (0.0.6)\n",
            "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.1)\n",
            "Requirement already satisfied: requests~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.31.0)\n",
            "Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.10.0)\n",
            "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.5.0)\n",
            "Requirement already satisfied: uvicorn>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.23.2)\n",
            "Requirement already satisfied: websockets<12.0,>=10.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (11.0.3)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.5.0->gradio) (2023.6.0)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.4)\n",
            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (4.19.0)\n",
            "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio) (0.12.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (3.12.2)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.14.0->gradio) (4.66.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.1.0)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.11.0)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.42.1)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2023.3.post1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests~=2.0->gradio) (2023.7.22)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (8.1.7)\n",
            "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn>=0.14.0->gradio) (0.14.0)\n",
            "Requirement already satisfied: anyio<4.0.0,>=3.7.1 in /usr/local/lib/python3.10/dist-packages (from fastapi->gradio) (3.7.1)\n",
            "Requirement already satisfied: starlette<0.28.0,>=0.27.0 in /usr/local/lib/python3.10/dist-packages (from fastapi->gradio) (0.27.0)\n",
            "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (0.17.3)\n",
            "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio) (1.3.0)\n",
            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0.0,>=3.7.1->fastapi->gradio) (1.1.3)\n",
            "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.1.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.7.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.30.2)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.10.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import subprocess\n",
        "import random\n",
        "import os\n",
        "from pathlib import Path\n",
        "import librosa\n",
        "from scipy.io import wavfile\n",
        "import numpy as np\n",
        "import torch\n",
        "import csv\n",
        "import whisper\n",
        "\n",
        "def split_long_audio(model, filepaths, save_dir=\"data_dir\", out_sr=44100):\n",
        "    if isinstance(filepaths, str):\n",
        "        filepaths = [filepaths]\n",
        "\n",
        "    for file_idx, filepath in enumerate(filepaths):\n",
        "\n",
        "        save_path = Path(save_dir)\n",
        "        save_path.mkdir(exist_ok=True, parents=True)\n",
        "\n",
        "        print(f\"Transcribing file {file_idx}: '{filepath}' to segments...\")\n",
        "        result = model.transcribe(filepath, word_timestamps=True, task=\"transcribe\", beam_size=5, best_of=5)\n",
        "        segments = result['segments']\n",
        "\n",
        "        wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)\n",
        "        wav, _ = librosa.effects.trim(wav, top_db=20)\n",
        "        peak = np.abs(wav).max()\n",
        "        if peak > 1.0:\n",
        "            wav = 0.98 * wav / peak\n",
        "        wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)\n",
        "        wav2 /= max(wav2.max(), -wav2.min())\n",
        "\n",
        "        for i, seg in enumerate(segments):\n",
        "            start_time = seg['start']\n",
        "            end_time = seg['end']\n",
        "            wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]\n",
        "            wav_seg_name = f\"{file_idx}_{i}.wav\"\n",
        "            out_fpath = save_path / wav_seg_name\n",
        "            wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))\n",
        "\n",
        "whisper_size = \"medium\"\n",
        "whisper_model = whisper.load_model(whisper_size)"
      ],
      "metadata": {
        "id": "NdoD-ZnIaWhN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from modelscope.tools import run_auto_label\n",
        "\n",
        "from modelscope.models.audio.tts import SambertHifigan\n",
        "from modelscope.pipelines import pipeline\n",
        "from modelscope.utils.constant import Tasks\n",
        "\n",
        "from modelscope.metainfo import Trainers\n",
        "from modelscope.trainers import build_trainer\n",
        "from modelscope.utils.audio.audio_utils import TtsTrainType\n",
        "\n",
        "pretrained_model_id = 'damo/speech_personal_sambert-hifigan_nsf_tts_zh-cn_pretrain_16k'\n",
        "\n",
        "dataset_id = \"/content/Personal-TTS-v2/output_training_data/\"\n",
        "pretrain_work_dir = \"/content/Personal-TTS-v2/pretrain_work_dir/\"\n",
        "\n",
        "\n",
        "def auto_label(audio):\n",
        "    try:\n",
        "        split_long_audio(whisper_model, audio, \"/content/Personal-TTS-v2/test_wavs\")\n",
        "        input_wav = \"/content/Personal-TTS-v2/test_wavs/\"\n",
        "        output_data = \"/content/Personal-TTS-v2/output_training_data/\"\n",
        "        ret, report = run_auto_label(input_wav=input_wav, work_dir=output_data, resource_revision=\"v1.0.7\")\n",
        "\n",
        "    except Exception:\n",
        "        pass\n",
        "    return \"标注成功\"\n",
        "\n",
        "\n",
        "def train(a):\n",
        "    try:\n",
        "        train_info = {\n",
        "            TtsTrainType.TRAIN_TYPE_SAMBERT: {  # 配置训练AM（sambert）模型\n",
        "                'train_steps': 52,               # 训练多少个step\n",
        "                'save_interval_steps': 50,       # 每训练多少个step保存一次checkpoint\n",
        "                'log_interval': 10               # 每训练多少个step打印一次训练日志\n",
        "            }\n",
        "        }\n",
        "\n",
        "        # 配置训练参数，指定数据集，临时工作目录和train_info\n",
        "        kwargs = dict(\n",
        "            model=pretrained_model_id,                  # 指定要finetune的模型\n",
        "            model_revision = \"v1.0.6\",\n",
        "            work_dir=pretrain_work_dir,                 # 指定临时工作目录\n",
        "            train_dataset=dataset_id,                   # 指定数据集id\n",
        "            train_type=train_info                       # 指定要训练类型及参数\n",
        "        )\n",
        "\n",
        "        trainer = build_trainer(Trainers.speech_kantts_trainer,\n",
        "                            default_args=kwargs)\n",
        "\n",
        "        trainer.train()\n",
        "\n",
        "    except Exception:\n",
        "        pass\n",
        "\n",
        "    return \"训练完成\"\n",
        "\n",
        "\n",
        "import random\n",
        "\n",
        "def infer(text):\n",
        "\n",
        "  model_dir = \"/content/Personal-TTS-v2/pretrain_work_dir\"\n",
        "\n",
        "  custom_infer_abs = {\n",
        "      'voice_name':\n",
        "      'F7',\n",
        "      'am_ckpt':\n",
        "      os.path.join(model_dir, 'tmp_am', 'ckpt'),\n",
        "      'am_config':\n",
        "      os.path.join(model_dir, 'tmp_am', 'config.yaml'),\n",
        "      'voc_ckpt':\n",
        "      os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan', 'ckpt'),\n",
        "      'voc_config':\n",
        "      os.path.join(model_dir, 'orig_model', 'basemodel_16k', 'hifigan',\n",
        "              'config.yaml'),\n",
        "      'audio_config':\n",
        "      os.path.join(model_dir, 'data', 'audio_config.yaml'),\n",
        "      'se_file':\n",
        "      os.path.join(model_dir, 'data', 'se', 'se.npy')\n",
        "  }\n",
        "  kwargs = {'custom_ckpt': custom_infer_abs}\n",
        "\n",
        "  model_id = SambertHifigan(os.path.join(model_dir, \"orig_model\"), **kwargs)\n",
        "\n",
        "  inference = pipeline(task=Tasks.text_to_speech, model=model_id)\n",
        "  output = inference(input=text)\n",
        "\n",
        "  filename = str(random.randint(1, 1000000000000))\n",
        "\n",
        "  with open(filename + \"myfile.wav\", mode='bx') as f:\n",
        "      f.write(output[\"output_wav\"])\n",
        "  return filename + \"myfile.wav\""
      ],
      "metadata": {
        "id": "soFE76hH-te7",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "96ff1a60-33b7-437f-f1f7-be538c5bc747"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2023-09-09 07:29:57,174 - modelscope - INFO - PyTorch version 2.0.1+cu118 Found.\n",
            "2023-09-09 07:29:57,178 - modelscope - INFO - TensorFlow version 2.13.0 Found.\n",
            "2023-09-09 07:29:57,179 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer\n",
            "2023-09-09 07:29:57,214 - modelscope - INFO - Loading done! Current index file version is 1.9.0, with md5 26bab0b425de9d3ec01e3de23e7c0f4d and a total number of 921 components indexed\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "auto_label(\"nana_speech.wav\")\n",
        "train(\"test\")\n",
        "infer(\"欢迎使用滔滔智能的声音克隆产品\")"
      ],
      "metadata": {
        "id": "xHSIK3gLjsVA",
        "outputId": "ea68f7e7-2509-46a7-8933-0d99814c9c43",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Transcribing file 0: 'nana_speech.wav' to segments...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2023-09-09 07:30:22,206 - modelscope - INFO - Use user-specified model revision: v1.0.7\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-09-09 07:30:30\n",
            "TTS-AutoLabel version: 1.1.8\n",
            "TTS-AutoLabel resource path: /root/.cache/modelscope/hub/damo/speech_ptts_autolabel_16k/model\n",
            "Target sampling rate: 16000\n",
            "Input wav dir: /content/Personal-TTS-v2/test_wavs\n",
            "Output data dir: /content/Personal-TTS-v2/output_training_data\n",
            "wav_preprocess start...\n",
            "---  new folder...  ---\n",
            "---  OK  ---\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 34/34 [00:00<00:00, 112.07it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[VAD] chunk recordings for training.\n",
            "wav cut by vad start...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/34 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_6_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r  3%|▎         | 1/34 [00:00<00:10,  3.11it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_9_S0000 is no need to cut\n",
            "wav 0_14_S0000 is no need to cut\n",
            "wav 0_3_S0000 is no need to cut\n",
            "wav 0_17_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 15%|█▍        | 5/34 [00:00<00:02, 13.83it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_5_S0000 is no need to cut\n",
            "wav 0_32_S0000 is no need to cut\n",
            "wav 0_0_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 24%|██▎       | 8/34 [00:00<00:01, 17.31it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_13_S0000 is no need to cut\n",
            "wav 0_27_S0000 is no need to cut\n",
            "wav 0_2_S0000 is no need to cut\n",
            "wav 0_11_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 35%|███▌      | 12/34 [00:00<00:01, 21.94it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_12_S0000 is no need to cut\n",
            "wav 0_1_S0000 is no need to cut\n",
            "wav 0_16_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 44%|████▍     | 15/34 [00:00<00:00, 23.32it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_23_S0000 is no need to cut\n",
            "wav 0_8_S0000 is no need to cut\n",
            "VAD:  0_26_S0000.wav has 17920 samples, shorter than expected samples,                            skipping saving in S\n",
            "wav 0_22_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 56%|█████▌    | 19/34 [00:00<00:00, 27.22it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_33_S0000 is no need to cut\n",
            "wav 0_29_S0000 is no need to cut\n",
            "wav 0_28_S0000 is no need to cut\n",
            "wav 0_20_S0000 is no need to cut\n",
            "wav 0_10_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 71%|███████   | 24/34 [00:01<00:00, 33.14it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "VAD:  0_30_S0000.wav has 10560 samples, shorter than expected samples,                            skipping saving in S\n",
            "wav 0_18_S0000 is no need to cut\n",
            "wav 0_21_S0000 is no need to cut\n",
            "VAD:  0_15_S0000.wav has 11200 samples, shorter than expected samples,                            skipping saving in S\n",
            "wav 0_31_S0000 is no need to cut\n",
            "VAD:  0_24_S0000.wav has 17920 samples, shorter than expected samples,                            skipping saving in S\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 88%|████████▊ | 30/34 [00:01<00:00, 39.36it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav 0_25_S0000 is no need to cut\n",
            "wav 0_4_S0000 is no need to cut\n",
            "wav 0_19_S0000 is no need to cut\n",
            "wav 0_7_S0000 is no need to cut\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 34/34 [00:01<00:00, 27.87it/s]\n",
            "0it [00:00, ?it/s]\n",
            "0it [00:00, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "---  new folder...  ---\n",
            "---  OK  ---\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 199.00it/s]\n",
            "100%|██████████| 30/30 [00:01<00:00, 20.81it/s]\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Text to label start...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 30/30 [00:04<00:00,  6.14it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "pre-break recording in paragraph by vad.\n",
            "Generate phone interval by fa align.\n",
            "prosody_dir=/content/Personal-TTS-v2/output_training_data/paragraph/prosody\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "FA processing...\n",
            "---  New folder /content/Personal-TTS-v2/output_training_data/raw_ali...  ---\n",
            "---  OK  ---\n",
            "---  New folder /content/Personal-TTS-v2/output_training_data/raw_interval...  ---\n",
            "---  OK  ---\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r  0%|          | 0/30 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_14_S0000.wav, text: 长出 厚厚的 剪子 给 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r  3%|▎         | 1/30 [00:00<00:03,  7.62it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_4_S0000.wav, text: 我 是 从 六岁 开始 学琴的 所以 就 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_31_S0000.wav, text: 他有 这样的 经验吧 就是 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_1_S0000.wav, text: 八岁的 时候 立志 成为 一个 像 马游勇 一样的 大提琴 演奏家在 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 13%|█▎        | 4/30 [00:00<00:01, 17.10it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_19_S0000.wav, text: 右手 要 无限的 撑开 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_0_S0000.wav, text: 在 十二岁 以前呢 我 从来 没有 想过 自己 会 演戏 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_18_S0000.wav, text: 在在 恋情的 过程 当中呢 你的左 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 23%|██▎       | 7/30 [00:00<00:01, 21.37it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_23_S0000.wav, text: 这 这个 是我 一直 在跟 这些 小朋友 讲了 讲的 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_20_S0000.wav, text: 穿 这样的 东西 所 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_10_S0000.wav, text: 像 火烧 一样 疼 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_9_S0000.wav, text: 练完 一段 时间 之后 就会 发现 手指 开始 发红 发肿像 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 37%|███▋      | 11/30 [00:00<00:00, 23.82it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_13_S0000.wav, text: 要 练到 直到 第一 层皮 退下去 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_2_S0000.wav, text: 在 非常 神圣的 音乐厅 里面 用 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_11_S0000.wav, text: 因为 恋情 这种 事情 是 不能 间断的 一恋 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 47%|████▋     | 14/30 [00:00<00:00, 22.68it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_12_S0000.wav, text: 就练 好几个 小时 练 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_17_S0000.wav, text: 左手 要比 右手 大 那么 一点 点因 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_16_S0000.wav, text: 说 两只手 是 不同 大小的 我的 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_28_S0000.wav, text: 就 很好胜的 人 我觉 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 60%|██████    | 18/30 [00:00<00:00, 25.95it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_22_S0000.wav, text: 比 学历 更重要啊 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_21_S0000.wav, text: 所以 你我 觉得 人 要有 一技之长呢 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_8_S0000.wav, text: 手指 要 不停的 在 纸板 上面 摩擦 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 70%|███████   | 21/30 [00:00<00:00, 26.00it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_3_S0000.wav, text: 音乐 感染 每一个人 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_25_S0000.wav, text: 今天 是在 学校嘛 其住 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_33_S0000.wav, text: 被 公车门 夹到 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_7_S0000.wav, text: 开始 练琴的 时候 因为 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_29_S0000.wav, text: 拿个 很简单的 例子 好了 打光 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 87%|████████▋ | 26/30 [00:01<00:00, 29.90it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_5_S0000.wav, text: 其实 我 特别 能 理解 那些 中途 想要 放弃 逃跑的 一些 学生 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_6_S0000.wav, text: 对 恋情 这件 事情 太不 容易了 刚开 \n",
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_27_S0000.wav, text: 也 相信 大家 都 看得 出来 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r 97%|█████████▋| 29/30 [00:01<00:00, 24.09it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "wav: /content/Personal-TTS-v2/output_training_data/wav_cut_16k/0_32_S0000.wav, text: 人 很多的 时候 被 挤到 最后 然后 \n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r100%|██████████| 30/30 [00:01<00:00, 23.94it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "---  There is this folder!  ---\n",
            "0_27_S0000.ali\n",
            "0_31_S0000.ali\n",
            "0_9_S0000.ali\n",
            "0_32_S0000.ali\n",
            "0_18_S0000.ali\n",
            "0_5_S0000.ali\n",
            "0_33_S0000.ali\n",
            "0_8_S0000.ali\n",
            "0_4_S0000.ali\n",
            "0_20_S0000.ali\n",
            "0_0_S0000.ali\n",
            "0_7_S0000.ali\n",
            "0_28_S0000.ali\n",
            "0_3_S0000.ali\n",
            "0_14_S0000.ali\n",
            "0_6_S0000.ali\n",
            "0_2_S0000.ali\n",
            "0_19_S0000.ali\n",
            "0_12_S0000.ali\n",
            "0_23_S0000.ali\n",
            "0_22_S0000.ali\n",
            "0_21_S0000.ali\n",
            "0_25_S0000.ali\n",
            "0_11_S0000.ali\n",
            "0_10_S0000.ali\n",
            "0_29_S0000.ali\n",
            "0_13_S0000.ali\n",
            "0_16_S0000.ali\n",
            "0_17_S0000.ali\n",
            "0_1_S0000.ali\n",
            "---  New folder /content/Personal-TTS-v2/output_training_data/ali...  ---\n",
            "---  OK  ---\n",
            "---  New folder /content/Personal-TTS-v2/output_training_data/coarse_interval...  ---\n",
            "---  OK  ---\n",
            "Trim silence wav with align info and modify wav files....\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 146.67it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Convert align info to interval files....\n",
            "---  There is this folder!  ---\n",
            "0_27_S0000.ali\n",
            "0_31_S0000.ali\n",
            "0_9_S0000.ali\n",
            "0_32_S0000.ali\n",
            "0_18_S0000.ali\n",
            "0_5_S0000.ali\n",
            "0_33_S0000.ali\n",
            "0_8_S0000.ali\n",
            "0_4_S0000.ali\n",
            "0_20_S0000.ali\n",
            "0_0_S0000.ali\n",
            "0_7_S0000.ali\n",
            "0_28_S0000.ali\n",
            "0_3_S0000.ali\n",
            "0_14_S0000.ali\n",
            "0_6_S0000.ali\n",
            "0_2_S0000.ali\n",
            "0_19_S0000.ali\n",
            "0_12_S0000.ali\n",
            "0_23_S0000.ali\n",
            "0_22_S0000.ali\n",
            "0_21_S0000.ali\n",
            "0_25_S0000.ali\n",
            "0_11_S0000.ali\n",
            "0_10_S0000.ali\n",
            "0_29_S0000.ali\n",
            "0_13_S0000.ali\n",
            "0_16_S0000.ali\n",
            "0_17_S0000.ali\n",
            "0_1_S0000.ali\n",
            "qualification review.\n",
            "prosody sillence detect.\n",
            "--- Remove /content/Personal-TTS-v2/output_training_data/prosody folder!  ---\n",
            "---  New folder /content/Personal-TTS-v2/output_training_data/prosody...  ---\n",
            "---  OK  ---\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 60/60 [00:00<00:00, 5585.33it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Write prosody file\n",
            "0 \"mismatch\" sentences\n",
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Trim sp started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 30/30 [00:00<00:00, 151.59it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Trim sp finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Auto labeling info: stage 1 | develop mode 0 | gender:female | score 10.000000 | retcode 0\n",
            "labeling report:\n",
            "stage 1 | develop mode 0 | gender female | score 10.000000 | retcode 0\n",
            "qulification report:\n",
            "credit score: 10.000000\n",
            "qualified score: 3.000000\n",
            "normalized snr: 35.000000\n",
            "abandon utt snr threshold: 10.000000\n",
            "snr score ration: 0.500000\n",
            "interval score ration: 0.500000\n",
            "data qulificaion report:\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2023-09-09 07:30:49,683 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
            "2023-09-09 07:30:51,894 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
            "2023-09-09 07:30:52,717 - modelscope - INFO - Set workdir to /content/Personal-TTS-v2/pretrain_work_dir/\n",
            "2023-09-09 07:30:52,741 - modelscope - INFO - load /content/Personal-TTS-v2/output_training_data/\n",
            "2023-09-09 07:30:53,292 - modelscope - INFO - Use user-specified model revision: v1.0.6\n",
            "2023-09-09 07:30:55,738 - modelscope - INFO - am_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/config.yaml voc_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/config.yaml\n",
            "2023-09-09 07:30:55,740 - modelscope - INFO - audio_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/audio_config_se_16k.yaml\n",
            "2023-09-09 07:30:55,740 - modelscope - INFO - am_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/ckpt/checkpoint_2400000.pth')])\n",
            "2023-09-09 07:30:55,741 - modelscope - INFO - voc_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/ckpt/checkpoint_2400000.pth')])\n",
            "2023-09-09 07:30:55,742 - modelscope - INFO - se_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/se.npy se_model_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/speaker_embedding/se.onnx\n",
            "2023-09-09 07:30:55,743 - modelscope - INFO - mvn_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/mvn.npy\n",
            "100%|██████████| 60/60 [00:00<00:00, 6437.10it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "TextScriptConvertor.process:\n",
            "Save script to: /content/Personal-TTS-v2/pretrain_work_dir/data/Script.xml\n",
            "TextScriptConvertor.process:\n",
            "Save metafile to: /content/Personal-TTS-v2/pretrain_work_dir/data/raw_metafile.txt\n",
            "[AudioProcessor] Initialize AudioProcessor.\n",
            "[AudioProcessor] config params:\n",
            "[AudioProcessor] wav_normalize: True\n",
            "[AudioProcessor] trim_silence: True\n",
            "[AudioProcessor] trim_silence_threshold_db: 60\n",
            "[AudioProcessor] preemphasize: False\n",
            "[AudioProcessor] sampling_rate: 16000\n",
            "[AudioProcessor] hop_length: 200\n",
            "[AudioProcessor] win_length: 1000\n",
            "[AudioProcessor] n_fft: 2048\n",
            "[AudioProcessor] n_mels: 80\n",
            "[AudioProcessor] fmin: 0.0\n",
            "[AudioProcessor] fmax: 8000.0\n",
            "[AudioProcessor] phone_level_feature: True\n",
            "[AudioProcessor] se_feature: True\n",
            "[AudioProcessor] norm_type: mean_std\n",
            "[AudioProcessor] max_norm: 1.0\n",
            "[AudioProcessor] symmetric: False\n",
            "[AudioProcessor] min_level_db: -100.0\n",
            "[AudioProcessor] ref_level_db: 20\n",
            "[AudioProcessor] num_workers: 16\n",
            "[AudioProcessor] Amplitude normalization started\n",
            "Volume statistic proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 85.39it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Average amplitude RMS : 0.13875506666666668\n",
            "Volume statistic done.\n",
            "Volume normalization proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 1784.76it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Volume normalization done.\n",
            "[AudioProcessor] Amplitude normalization finished\n",
            "[AudioProcessor] Duration generation started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/30 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Duration align with mel is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 30/30 [00:00<00:00, 44.11it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Duration generate finished\n",
            "[AudioProcessor] Trim silence with interval started\n",
            "[AudioProcessor] Start to load pcm from /content/Personal-TTS-v2/pretrain_work_dir/data/wav\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 43.33it/s]\n",
            "  0%|          | 0/30 [00:00<?, ?it/s]\n",
            "100%|██████████| 30/30 [00:00<00:00, 2887.38it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Trim silence finished\n",
            "[AudioProcessor] Melspec extraction started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:01<00:00, 22.52it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Melspec extraction finished\n",
            "Melspec statistic proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 27497.62it/s]\n",
            "100%|██████████| 30/30 [00:00<00:00, 6936.17it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Melspec statistic done\n",
            "[AudioProcessor] melspec mean and std saved to:\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/mel/mel_mean.txt,\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/mel/mel_std.txt\n",
            "[AudioProcessor] Melspec mean std norm is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Melspec normalization finished\n",
            "[AudioProcessor] Normed Melspec saved to /content/Personal-TTS-v2/pretrain_work_dir/data/mel\n",
            "[Duration Calibrating] Syllable duration 34                        is not equal to the number of symbols 33, index: 0_8_S0000\n",
            "[AudioProcessor] Pitch extraction started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\r  0%|          | 0/30 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Pitch align with mel is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 30/30 [00:01<00:00, 29.37it/s]\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Pitch normalization is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 30/30 [00:00<00:00, 23506.28it/s]\n",
            "100%|██████████| 30/30 [00:00<00:00, 22270.64it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] f0 mean and std saved to:\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/f0/f0_mean.txt,\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/f0/f0_std.txt\n",
            "[AudioProcessor] Pitch mean std norm is proceeding...\n",
            "[AudioProcessor] Pitch turn to phone-level is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 42.49it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Pitch normalization finished\n",
            "[AudioProcessor] Normed f0 saved to /content/Personal-TTS-v2/pretrain_work_dir/data/f0\n",
            "[AudioProcessor] Pitch extraction finished\n",
            "[AudioProcessor] Energy extraction started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 36.21it/s]\n",
            "100%|██████████| 30/30 [00:00<00:00, 35757.07it/s]\n",
            "100%|██████████| 30/30 [00:00<00:00, 23488.73it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] energy mean and std saved to:\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/energy/energy_mean.txt,\n",
            "/content/Personal-TTS-v2/pretrain_work_dir/data/energy/energy_std.txt\n",
            "[AudioProcessor] Energy mean std norm is proceeding...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 30/30 [00:00<00:00, 43.47it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[AudioProcessor] Energy normalization finished\n",
            "[AudioProcessor] Normed Energy saved to /content/Personal-TTS-v2/pretrain_work_dir/data/energy\n",
            "[AudioProcessor] Energy extraction finished\n",
            "[AudioProcessor] All features extracted successfully!\n",
            "Processing audio done.\n",
            "[SpeakerEmbeddingProcessor] Speaker embedding extractor started\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[SpeakerEmbeddingProcessor] se model loading error!!!\n",
            "[SpeakerEmbeddingProcessor] please update your se model to ensure that the version is greater than or equal to 1.0.5\n",
            "[SpeakerEmbeddingProcessor] try load it as se.model\n",
            "[SpeakerEmbeddingProcessor] Speaker embedding extracted successfully!\n",
            "Processing speaker embedding done.\n",
            "Processing done.\n",
            "Voc metafile generated.\n",
            "AM metafile generated.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "2023-09-09 07:32:01,238 - modelscope - INFO - Start training....\n",
            "2023-09-09 07:32:01,239 - modelscope - INFO - Start SAMBERT training...\n",
            "2023-09-09 07:32:01,240 - modelscope - INFO - TRAIN SAMBERT....\n",
            "2023-09-09 07:32:01,254 - modelscope - INFO - TRAINING steps: 2400052\n",
            "2023-09-09 07:32:01,261 - modelscope - INFO - audio_config = {'fmax': 8000.0, 'fmin': 0.0, 'hop_length': 200, 'max_norm': 1.0, 'min_level_db': -100.0, 'n_fft': 2048, 'n_mels': 80, 'norm_type': 'mean_std', 'num_workers': 16, 'phone_level_feature': True, 'preemphasize': False, 'ref_level_db': 20, 'sampling_rate': 16000, 'symmetric': False, 'trim_silence': True, 'trim_silence_threshold_db': 60, 'wav_normalize': True, 'win_length': 1000}\n",
            "2023-09-09 07:32:01,262 - modelscope - INFO - Loss = {'MelReconLoss': {'enable': True, 'params': {'loss_type': 'mae'}}, 'ProsodyReconLoss': {'enable': True, 'params': {'loss_type': 'mae'}}}\n",
            "2023-09-09 07:32:01,263 - modelscope - INFO - Model = {'KanTtsSAMBERT': {'optimizer': {'params': {'betas': [0.9, 0.98], 'eps': 1e-09, 'lr': 0.001, 'weight_decay': 0.0}, 'type': 'Adam'}, 'params': {'MAS': False, 'NSF': True, 'SE': True, 'decoder_attention_dropout': 0.1, 'decoder_dropout': 0.1, 'decoder_ffn_inner_dim': 1024, 'decoder_num_heads': 8, 'decoder_num_layers': 12, 'decoder_num_units': 128, 'decoder_prenet_units': [256, 256], 'decoder_relu_dropout': 0.1, 'dur_pred_lstm_units': 128, 'dur_pred_prenet_units': [128, 128], 'embedding_dim': 512, 'emotion_units': 32, 'encoder_attention_dropout': 0.1, 'encoder_dropout': 0.1, 'encoder_ffn_inner_dim': 1024, 'encoder_num_heads': 8, 'encoder_num_layers': 8, 'encoder_num_units': 128, 'encoder_projection_units': 32, 'encoder_relu_dropout': 0.1, 'max_len': 800, 'nsf_f0_global_maximum': 730.0, 'nsf_f0_global_minimum': 30.0, 'nsf_norm_type': 'global', 'num_mels': 82, 'outputs_per_step': 3, 'postnet_dropout': 0.1, 'postnet_ffn_inner_dim': 512, 'postnet_filter_size': 41, 'postnet_fsmn_num_layers': 4, 'postnet_lstm_units': 128, 'postnet_num_memory_units': 256, 'postnet_shift': 17, 'predictor_dropout': 0.1, 'predictor_ffn_inner_dim': 256, 'predictor_filter_size': 41, 'predictor_fsmn_num_layers': 3, 'predictor_lstm_units': 128, 'predictor_num_memory_units': 128, 'predictor_shift': 0, 'speaker_units': 192}, 'scheduler': {'params': {'warmup_steps': 4000}, 'type': 'NoamLR'}}}\n",
            "2023-09-09 07:32:01,264 - modelscope - INFO - allow_cache = False\n",
            "2023-09-09 07:32:01,264 - modelscope - INFO - batch_size = 32\n",
            "2023-09-09 07:32:01,265 - modelscope - INFO - create_time = 2023-09-09 07:32:01\n",
            "2023-09-09 07:32:01,266 - modelscope - INFO - eval_interval_steps = 10000000000000000\n",
            "2023-09-09 07:32:01,267 - modelscope - INFO - git_revision_hash = d16755444c9baf23348213211a5ed9035458ecf0\n",
            "2023-09-09 07:32:01,268 - modelscope - INFO - grad_norm = 1.0\n",
            "2023-09-09 07:32:01,269 - modelscope - INFO - linguistic_unit = {'cleaners': 'english_cleaners', 'lfeat_type_list': 'sy,tone,syllable_flag,word_segment,emo_category,speaker_category', 'speaker_list': 'F7'}\n",
            "2023-09-09 07:32:01,275 - modelscope - INFO - log_interval_steps = 50\n",
            "2023-09-09 07:32:01,276 - modelscope - INFO - model_type = sambert\n",
            "2023-09-09 07:32:01,278 - modelscope - INFO - num_save_intermediate_results = 4\n",
            "2023-09-09 07:32:01,279 - modelscope - INFO - num_workers = 4\n",
            "2023-09-09 07:32:01,280 - modelscope - INFO - pin_memory = False\n",
            "2023-09-09 07:32:01,281 - modelscope - INFO - remove_short_samples = False\n",
            "2023-09-09 07:32:01,284 - modelscope - INFO - save_interval_steps = 50\n",
            "2023-09-09 07:32:01,285 - modelscope - INFO - train_max_steps = 2400052\n",
            "2023-09-09 07:32:01,286 - modelscope - INFO - train_steps = 52\n",
            "2023-09-09 07:32:01,287 - modelscope - INFO - log_interval = 10\n",
            "2023-09-09 07:32:01,288 - modelscope - INFO - modelscope_version = 1.9.0\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loading metafile...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 28/28 [00:00<00:00, 28883.55it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loading metafile...\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00, 5236.33it/s]\n",
            "2023-09-09 07:32:01,307 - modelscope - INFO - The number of training files = 28.\n",
            "2023-09-09 07:32:01,308 - modelscope - INFO - The number of validation files = 1.\n",
            "2023-09-09 07:32:02,161 - modelscope - INFO - Successfully resumed from /content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/sambert/ckpt/checkpoint_2400000.pth.\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Checkpoint saved at step 2400000\n",
            "(Steps: 2400000) train/TotalLoss = 0.0395.\n",
            "(Steps: 2400000) train/mel_loss_ = 0.0091.\n",
            "(Steps: 2400000) train/mel_loss = 0.0081.\n",
            "(Steps: 2400000) train/dur_loss = 0.0056.\n",
            "(Steps: 2400000) train/pitch_loss = 0.0076.\n",
            "(Steps: 2400000) train/energy_loss = 0.0091.\n",
            "(Steps: 2400000) train/batch_size = 0.5600.\n",
            "(Steps: 2400000) train/x_band_width = 0.4200.\n",
            "(Steps: 2400000) train/h_band_width = 0.4200.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:02<00:00,  2.63s/it]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 0 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.65it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 1 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.72it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 2 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.65it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 3 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.75it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 4 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.61it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 5 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.73it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 6 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.55it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 7 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.66it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 8 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.66it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 9 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(Steps: 2400010) train/TotalLoss = 0.3747.\n",
            "(Steps: 2400010) train/mel_loss_ = 0.0853.\n",
            "(Steps: 2400010) train/mel_loss = 0.0710.\n",
            "(Steps: 2400010) train/dur_loss = 0.0537.\n",
            "(Steps: 2400010) train/pitch_loss = 0.0752.\n",
            "(Steps: 2400010) train/energy_loss = 0.0896.\n",
            "(Steps: 2400010) train/batch_size = 5.6000.\n",
            "(Steps: 2400010) train/x_band_width = 4.2000.\n",
            "(Steps: 2400010) train/h_band_width = 4.2000.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:00<00:00,  1.60it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 10 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.61it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 11 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.63it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 12 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.65it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 13 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.69it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 14 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.72it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 15 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.73it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 16 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.72it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 17 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.78it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 18 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.72it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 19 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(Steps: 2400020) train/TotalLoss = 0.3435.\n",
            "(Steps: 2400020) train/mel_loss_ = 0.0777.\n",
            "(Steps: 2400020) train/mel_loss = 0.0616.\n",
            "(Steps: 2400020) train/dur_loss = 0.0474.\n",
            "(Steps: 2400020) train/pitch_loss = 0.0709.\n",
            "(Steps: 2400020) train/energy_loss = 0.0859.\n",
            "(Steps: 2400020) train/batch_size = 5.6000.\n",
            "(Steps: 2400020) train/x_band_width = 4.2000.\n",
            "(Steps: 2400020) train/h_band_width = 4.2000.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:00<00:00,  1.66it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 20 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.76it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 21 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.76it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 22 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.80it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 23 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.73it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 24 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.74it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 25 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.71it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 26 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.77it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 27 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.78it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 28 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.77it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 29 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(Steps: 2400030) train/TotalLoss = 0.3183.\n",
            "(Steps: 2400030) train/mel_loss_ = 0.0735.\n",
            "(Steps: 2400030) train/mel_loss = 0.0567.\n",
            "(Steps: 2400030) train/dur_loss = 0.0422.\n",
            "(Steps: 2400030) train/pitch_loss = 0.0639.\n",
            "(Steps: 2400030) train/energy_loss = 0.0821.\n",
            "(Steps: 2400030) train/batch_size = 5.6000.\n",
            "(Steps: 2400030) train/x_band_width = 4.2000.\n",
            "(Steps: 2400030) train/h_band_width = 4.2000.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:00<00:00,  1.71it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 30 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.72it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 31 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.65it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 32 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.62it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 33 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.67it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 34 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.67it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 35 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.63it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 36 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.67it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 37 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.58it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 38 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.61it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 39 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(Steps: 2400040) train/TotalLoss = 0.2936.\n",
            "(Steps: 2400040) train/mel_loss_ = 0.0707.\n",
            "(Steps: 2400040) train/mel_loss = 0.0540.\n",
            "(Steps: 2400040) train/dur_loss = 0.0382.\n",
            "(Steps: 2400040) train/pitch_loss = 0.0518.\n",
            "(Steps: 2400040) train/energy_loss = 0.0789.\n",
            "(Steps: 2400040) train/batch_size = 5.6000.\n",
            "(Steps: 2400040) train/x_band_width = 4.2000.\n",
            "(Steps: 2400040) train/h_band_width = 4.2000.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:00<00:00,  1.55it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 40 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.65it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 41 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.67it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 42 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.70it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 43 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.74it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 44 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.74it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 45 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.78it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 46 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.75it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 47 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.76it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 48 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "100%|██████████| 1/1 [00:00<00:00,  1.76it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 49 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Checkpoint saved at step 2400050\n",
            "(Steps: 2400050) train/TotalLoss = 0.2746.\n",
            "(Steps: 2400050) train/mel_loss_ = 0.0687.\n",
            "(Steps: 2400050) train/mel_loss = 0.0518.\n",
            "(Steps: 2400050) train/dur_loss = 0.0354.\n",
            "(Steps: 2400050) train/pitch_loss = 0.0426.\n",
            "(Steps: 2400050) train/energy_loss = 0.0760.\n",
            "(Steps: 2400050) train/batch_size = 5.6000.\n",
            "(Steps: 2400050) train/x_band_width = 4.2000.\n",
            "(Steps: 2400050) train/h_band_width = 4.2000.\n",
            "KanTtsSAMBERT learning rate: 0.000082\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 1/1 [00:00<00:00,  1.10it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 50 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "  0%|          | 0/1 [00:00<?, ?it/s]"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 51 finished\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\n",
            "2023-09-09 07:32:35,661 - modelscope - INFO - SAMBERT training spent: 0.01 hours\n",
            "\n",
            "2023-09-09 07:32:35,662 - modelscope - INFO - skip HIFIGAN training...\n",
            "2023-09-09 07:32:35,814 - modelscope - INFO - am_config=/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/config.yaml voc_config=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/config.yaml\n",
            "2023-09-09 07:32:35,815 - modelscope - INFO - audio_config=/content/Personal-TTS-v2/pretrain_work_dir/data/audio_config.yaml\n",
            "2023-09-09 07:32:35,816 - modelscope - INFO - am_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/ckpt/checkpoint_2400000.pth'), (2400050, '/content/Personal-TTS-v2/pretrain_work_dir/tmp_am/ckpt/checkpoint_2400050.pth')])\n",
            "2023-09-09 07:32:35,818 - modelscope - INFO - voc_ckpts=OrderedDict([(2400000, '/content/Personal-TTS-v2/pretrain_work_dir/orig_model/basemodel_16k/hifigan/ckpt/checkpoint_2400000.pth')])\n",
            "2023-09-09 07:32:35,819 - modelscope - INFO - se_path=/content/Personal-TTS-v2/pretrain_work_dir/data/se/se.npy se_model_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/se.onnx\n",
            "2023-09-09 07:32:35,820 - modelscope - INFO - mvn_path=/content/Personal-TTS-v2/pretrain_work_dir/orig_model/mvn.npy\n",
            "2023-09-09 07:32:40,074 - modelscope - WARNING - No preprocessor field found in cfg.\n",
            "2023-09-09 07:32:40,076 - modelscope - WARNING - No val key and type key found in preprocessor domain of configuration.json file.\n",
            "2023-09-09 07:32:40,077 - modelscope - WARNING - Cannot find available config to build preprocessor at mode inference, current config: {'model_dir': '/content/Personal-TTS-v2/pretrain_work_dir/orig_model'}. trying to build by task and model information.\n",
            "2023-09-09 07:32:40,078 - modelscope - WARNING - No preprocessor key ('sambert-hifigan', 'text-to-speech') found in PREPROCESSOR_MAP, skip building preprocessor.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Removing weight norm...\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'69792480141myfile.wav'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "app = gr.Blocks()\n",
        "\n",
        "with app:\n",
        "    gr.Markdown(\"# <center>🥳🎶🎡 - Sambert中文声音克隆</center>\")\n",
        "    gr.Markdown(\"## <center>🌟 - 训练3分钟，推理5秒钟，中英自然发音 </center>\")\n",
        "    gr.Markdown(\"### <center>🌊 - 更多精彩应用，敬请关注[滔滔AI](http://www.talktalkai.com)；滔滔AI，为爱滔滔！💕</center>\")\n",
        "\n",
        "    with gr.Row():\n",
        "      inp1 = gr.Audio(type=\"filepath\", label=\"请上传一段音频\")\n",
        "      out1 = gr.Textbox(label=\"标注情况\", lines=1, interactive=False)\n",
        "\n",
        "      out2 = gr.Textbox(label=\"训练情况\", lines=1, interactive=False)\n",
        "      inp2 = gr.Textbox(label=\"文本\", lines=3)\n",
        "      out3 = gr.Audio(type=\"filepath\", label=\"合成的音频\")\n",
        "      btn1 = gr.Button(\"1.标注数据\")\n",
        "      btn2 = gr.Button(\"2.开始训练\")\n",
        "      btn3 = gr.Button(\"3.一键推理\", variant=\"primary\")\n",
        "\n",
        "      btn1.click(auto_label, inp1, out1)\n",
        "      btn2.click(train, out1, out2)\n",
        "      btn3.click(infer, inp2, out3)\n",
        "\n",
        "    gr.Markdown(\"### <center>注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。</center>\")\n",
        "    gr.HTML('''\n",
        "        <div class=\"footer\">\n",
        "                    <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘\n",
        "                    </p>\n",
        "        </div>\n",
        "    ''')\n",
        "app.launch(show_error=True, share=True)"
      ],
      "metadata": {
        "id": "lCW2U4EuiEKD",
        "outputId": "7ee9d670-3ed5-40ba-af79-e9b53241bb9d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 612
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
            "Running on public URL: https://c20364d25b1112c758.gradio.live\n",
            "\n",
            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "<div><iframe src=\"https://c20364d25b1112c758.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": []
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "PhKxBhVdiERs"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}